################################
#Make Fractionalization Balance#
################################

oldwd = getwd()

#Directory
setwd('./data')

#Get objects
old_ws = ls()

#############################
#Load Fractionalization Data#
#############################

fractionalization = fread('./census_2000_ethnicity/00-data/final/20190128_eth_rel_kec_data.csv')

####################################
#Load fracitionalization crosswalks#
####################################
crosswalk_2004 = fread('./crosswalks/kecamatan_dprd_2004_to_fractionalization.csv')
crosswalk_2009 = fread('./crosswalks/kecamatan_dprd_2009_to_fractionalization.csv')
setnames(crosswalk_2004, c('target_kecamatan', 'from_kecamatan'), c('kec_code', 'kecamatan_2004'))
setnames(crosswalk_2009, c('target_kecamatan', 'from_kecamatan'), c('kec_code', 'kecamatan_2009'))


setkey(fractionalization,  kec)
setkey(crosswalk_2004, kec_code)
setkey(crosswalk_2009, kec_code)

frac_2004 = crosswalk_2004[fractionalization]
frac_2009 = crosswalk_2009[fractionalization]


#Load kecamatan to DPRD crosswalks
##################################

#2004
kec_to_dprd_2004 = fread("./crosswalks/kecamatan_to_dprd2_2004.csv")
setnames(kec_to_dprd_2004, 'DAPIL.NUMBER', "DAPIL_NUMBER")
kec_to_dprd_2004[KECA %in% 1, KECA := 10]
kec_to_dprd_2004[, id_kec := paste0(PROP, sprintf("%02.f", as.numeric(KABU)), sprintf("%03.f", as.numeric(KECA)))]
kec_to_dprd_2004[, dapil := paste(KAB_NAME, DAPIL_NUMBER)]
kec_to_dprd_2004 = kec_to_dprd_2004[!is.na(DAPIL_NUMBER)]
kec_to_dprd_2004 = kec_to_dprd_2004[, 
                                    list(provinsi = PROV_NAME,
                                         id_prov = PROP,
                                         kabupaten = KAB_NAME,
                                         id_kab = KAB_CODE,
                                         id_kec = as.numeric(id_kec), dapil)]
kec_to_dprd_2004[provinsi %in% "IRIAN JAYA BARAT", id_prov := 91]


#2009
kec_to_dprd_2009 = fread("./crosswalks/kecamatan_to_dprd2_2009.csv")
kec_to_dprd_2009 = kec_to_dprd_2009[, 
                                    list(provinsi = provinsi, id_prov = provno,
                                         kabupaten = kabkot, id_kab = id_kab,
                                         id_kec = id_kec, dapil = label)]

#Merge in dapil codes
#####################
setkey(frac_2004, kecamatan_2004)
setkey(kec_to_dprd_2004, id_kec)

setkey(frac_2009, kecamatan_2009)
setkey(kec_to_dprd_2009, id_kec)

frac_2004 = frac_2004[kec_to_dprd_2004]
frac_2009 = frac_2009[kec_to_dprd_2009]

#Drop duplicate pre_treatment kecamatan
frac_2004[, drop := duplicated(kec_code), by = list(id_kab, dapil)]
frac_2004[, kec_use_count := .N, by = kec_code]
frac_2009[, drop := duplicated(kec_code), by = list(id_kab, dapil)]
frac_2009[, kec_use_count := .N, by = kec_code]

frac_2009[, kab_2000 := str_sub(kec_code, 1, 4)]
frac_2004[, kab_2000 := str_sub(kec_code, 1, 4)]

#Collapse
frac_2004 = frac_2004[!(drop) & !is.na(kec_code), 
                        list(
                          ethfractvil = weighted.mean(kec_ethfractvil,kec_pop_total),
                          relfractvil = weighted.mean(kec_relfractvil,kec_pop_total),
                          ethseg_d = weighted.mean(kec_ethseg_d,kec_pop_total),
                          relseg_d = weighted.mean(kec_relseg_d,kec_pop_total),
                          kab_2000 = kab_2000 %>% table %>% sort(decreasing = T) %>% .[1] %>% names()
                        ), 
                        by = list(provinsi, id_prov , kabupaten, id_kab, dapil)]

frac_2009 = frac_2009[!(drop) & !is.na(kec_code), 
                        list(
                          ethfractvil = weighted.mean(kec_ethfractvil,kec_pop_total),
                          relfractvil = weighted.mean(kec_relfractvil,kec_pop_total),
                          ethseg_d = weighted.mean(kec_ethseg_d,kec_pop_total),
                          relseg_d = weighted.mean(kec_relseg_d,kec_pop_total),
                          kab_2000 = kab_2000 %>% table %>% sort(decreasing = T) %>% .[1] %>% names()
                        ), 
                        by = list(provinsi, id_prov , kabupaten, id_kab, dapil)]

#Combine data:
frac_2004[, election_cycle := 2004]
frac_2009[, election_cycle := 2009]

frac_dapil_balance = rbindlist(list(frac_2004, frac_2009), use.names = T)

#########################
#Merge in dapil clusters#
#########################
dapil_clusters = fread('./crosswalks/dapil_clusters.csv')

setkey(dapil_clusters, election_cycle, id_kab, dapil)
setkey(frac_dapil_balance, election_cycle, id_kab, dapil)

frac_dapil_balance = dapil_clusters[frac_dapil_balance]

frac_dapil_balance[is.na(cluster), cluster := (-1:-.N) %>% as.integer]

drop = setdiff(ls(), c(old_ws, 'frac_dapil_balance')) 
rm(list = drop)
setwd(oldwd)